#' Outline
#' I. Anticipation/displacement stuff
#' I.0. Redundant, see IV.6 
#' I.1. Only consider first protest
#' I.2. Monthly volumes
#' 
#' II. Multiple treatments
#' II.3. Interaction of protest with cumulative protests
#' 
#' III. Parallel trends
#' III.4a Interact number of tweets in pre-period with protests
#' III.4b MP time trends
#' III.4c Greta's protests as placebo 
#' 
#' IV. 5. Event study framework, with more earlier days in the analysis


library(tidyverse)
library(broom) # tidy regression outputs
library(fixest) # fixest::feols() is very fast with big data and fixed effects
library(patchwork) # for combining plots
library(ggthemes) # for ggthemes
library(knitr) # for making regression tables in Latex
library(magrittr)
library(modelsummary)
shift <- data.table::shift # for lagging/leading variables

#### Load data ####

mp_df <- readRDS("data/analysis/mp_df.rds")

#### I.1 Only consider the first protest ####

first_protest_tweets_month <- mp_df %>% 
  mutate(shift_cumulative = lag(cumulative_fff_protests_all_time, n = 1)) %>% 
  filter(shift_cumulative < 1) %>% 
  feols(sum_ctweets ~ fff_event + sum_tweets |
          full_name + year_month,
        data = .) 

first_protest_tweets_week <- mp_df %>% 
  mutate(shift_cumulative = lag(cumulative_fff_protests_all_time, n = 1)) %>% 
  filter(shift_cumulative < 1) %>% 
  feols(sum_ctweets ~ fff_event + sum_tweets |
          full_name + year_week,
        data = .) 

first_protest_speeches_month <- mp_df %>% 
  mutate(shift_cumulative = lag(cumulative_fff_protests_all_time, n = 1)) %>% 
  filter(shift_cumulative < 1) %>% 
  feols(sum_ctweets ~ fff_event + sum_tweets |
          full_name + year_month,
        data = .) 

first_protest_speeches_week <- mp_df %>% 
  mutate(shift_cumulative = lag(cumulative_fff_protests_all_time, n = 1)) %>% 
  filter(shift_cumulative < 1) %>% 
  feols(sum_cspchs ~ fff_event + sum_spchs |
          full_name + year_week,
        data = .) 


#### 1.2 Monthly volumes ####

time_fe_only_tweets <- lm(sum_ctweets ~ sum_tweets + as.factor(year_month), data = mp_df)
time_fe_only_speeches <- lm(sum_cspchs ~ sum_spchs + as.factor(year_month), data = mp_df)

plot_tweet_time_fe <- time_fe_only_tweets %>% 
  tidy() %>% 
  filter(str_detect(term, "as.factor")) %>% 
  mutate(term = str_sub(term, start = 22L, end = -1L)) %>% 
  separate(term, into = c("year", "month")) %>% 
  mutate(time_labels = paste(year, month, sep = "-")) %>% 
  mutate_at(vars(year, month), ~as.integer(.)) %>% 
  arrange(year, month) %>% 
  mutate(index = row_number()) %>% 
  mutate(outcome = "Tweets")

plot_speech_time_fe <- time_fe_only_speeches %>% 
  tidy() %>% 
  filter(str_detect(term, "as.factor")) %>% 
  mutate(term = str_sub(term, start = 22L, end = -1L)) %>% 
  separate(term, into = c("year", "month")) %>% 
  mutate(time_labels = paste(year, month, sep = "-")) %>% 
  mutate_at(vars(year, month), ~as.integer(.)) %>% 
  arrange(year, month) %>% 
  mutate(index = row_number()) %>% 
  mutate(outcome = "Speeches")

bind_rows(plot_tweet_time_fe, plot_speech_time_fe) %>% 
  ggplot(., aes(index, estimate, shape = outcome, color = outcome)) +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_linerange(aes(ymin = estimate - 1.96 * std.error, 
                     ymax = estimate + 1.96 * std.error),
                 position = position_dodge(width = 0.5), size=0.75) +
  geom_point(aes(shape = outcome), 
             position = position_dodge(width = 0.5), size = 2) +
  scale_shape_manual(values = c(19,18)) +
  scale_color_manual(values=c("#79b321","#344d0e")) +
  scale_x_continuous(breaks = seq(1, 30, 6),
                     labels = plot_tweet_time_fe$time_labels[seq(1, 30, 6)]) +
  theme_bw() +
  labs(x = "", y = "Estimate and 95% CIs", 
       shape = "Outcome",
       color = "Outcome") +
  theme(legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave("plots/figa3.png", height = 6, width = 8, dpi = 600)



#### II.3. Interaction of protest with cumulative protests ####
## Interact number of cumulative protests with local protest

cumulative_protests_tweets_month <- mp_df %>% 
  filter(!is.na(fff_events_cumulative1)) %>% # Remove day 1 to match observations in main model
  feols(sum_ctweets ~ fff_event*cumulative_fff_protests_all_time +
          sum_tweets |
          full_name + year_month,
        data = .) 

cumulative_protests_tweets_week <- mp_df %>% 
  filter(!is.na(fff_events_cumulative1)) %>% 
  feols(sum_ctweets ~ fff_event*cumulative_fff_protests_all_time +
          sum_tweets |
          full_name + year_week,
        data = .) 

cumulative_protests_speeches_month <- mp_df %>% 
  filter(!is.na(fff_events_cumulative1)) %>% 
  feols(sum_cspchs ~ fff_event*cumulative_fff_protests_all_time +
          sum_spchs |
          full_name + year_month,
        data = .) 

cumulative_protests_speeches_week <- mp_df %>% 
  filter(!is.na(fff_events_cumulative1)) %>% 
  feols(sum_cspchs ~ fff_event*cumulative_fff_protests_all_time +
          sum_spchs |
          full_name + year_week,
        data = .) 


#### III.4a Interact number of tweets in pre-period with protests ####

## Interact FFF with pre-FFF climate speech

## Generate a cumulative count for the amount of times an MP tweeted by climate protests
# And then interact that with protests

mp_pre_period_characteristics <- mp_df %>% 
  group_by(full_name, cumulative_fff_protests_all_time) %>% 
  mutate(pre_fff_ctweets = sum(sum_ctweets),
         pre_fff_ctweets = ifelse(cumulative_fff_protests_all_time>=1, 0, pre_fff_ctweets)) %>% 
  mutate(pre_fff_cspchs = sum(sum_cspchs),
         pre_fff_cspchs = ifelse(cumulative_fff_protests_all_time>=1, 0, pre_fff_cspchs)) %>% 
  ungroup() %>% 
  group_by(full_name) %>% 
  mutate(pre_fff_ctweets = max(pre_fff_ctweets),
         pre_fff_cspchs = max(pre_fff_cspchs)) %>% 
  mutate(pre_jan2019 = ifelse(date < "2019-01-01", "Pre", "Post")) %>% 
  group_by(full_name, pre_jan2019) %>% 
  mutate(sum_ctweets_to_jan2019 = ifelse(pre_jan2019 == "Pre", sum(sum_ctweets), 0),
         sum_cspchs_to_jan2019 = ifelse(pre_jan2019 == "Pre", sum(sum_cspchs), 0)) %>% 
  group_by(full_name) %>% 
  mutate(sum_ctweets_to_jan2019 = max(sum_ctweets_to_jan2019),
         sum_cspchs_to_jan2019 = max(sum_cspchs_to_jan2019)) 

## Run model interacting number of climate tweets before first protest with protest incidence
pre_period_characteristics_tweets <- mp_pre_period_characteristics %>% 
  filter(!is.na(fff_events_cumulative1)) %>% 
  feols(sum_ctweets ~ fff_event*pre_fff_ctweets +
          sum_tweets |
          full_name + year_week,
        data = .) 

pre_period_characteristics_speeches <- mp_pre_period_characteristics %>% 
  filter(!is.na(fff_events_cumulative1)) %>% 
  feols(sum_cspchs ~ fff_event*pre_fff_cspchs +
          sum_spchs |
          full_name + year_week,
        data = .) 

#### III.4b MP time trends, parallel trends #### 

## MP time trends

trends_tweets_month <- mp_df %>%
  filter(!is.na(fff_events_cumulative1)) %>% 
  mutate(time_trend = as.numeric(as.factor(year_month))) %>% 
  feols(sum_ctweets ~ fff_event +
          sum_tweets |
          full_name[time_trend] + year_month,
        data = .)

trends_tweets_week <- mp_df %>%
  filter(!is.na(fff_events_cumulative1)) %>% 
  mutate(time_trend = as.numeric(as.factor(year_week))) %>% 
  feols(sum_ctweets ~ fff_event +
          sum_tweets |
          full_name[time_trend] + year_week,
        data = .)

trends_speeches_month <- mp_df %>%
  filter(!is.na(fff_events_cumulative1)) %>% 
  mutate(time_trend = as.numeric(as.factor(year_month))) %>% 
  feols(sum_cspchs ~ fff_event +
          sum_spchs |
          full_name[time_trend] + year_month,
        data = .)

trends_speeches_week <- mp_df %>%
  filter(!is.na(fff_events_cumulative1)) %>% 
  mutate(time_trend = as.numeric(as.factor(year_week))) %>% 
  feols(sum_cspchs ~ fff_event +
          sum_spchs |
          full_name[time_trend] + year_week,
        data = .)



#### III.4c Greta's protests as placebo ####

## Fridays before UK protests
placebo_tweets <- mp_df %>% 
  mutate(friday_greta = ifelse((wday(date)==6 & date > "2018-08-15"), 1, 0)) %>% 
  filter(date < "2019-01-01") %>% 
  feols(sum_ctweets ~ friday_greta + sum_tweets |
          full_name + year_week,
        data = .) 

placebo_speeches <- mp_df %>% 
  mutate(friday_greta = ifelse((wday(date)==6 & date > "2018-08-15"), 1, 0)) %>% 
  filter(date < "2019-01-01") %>% 
  feols(sum_cspchs ~ friday_greta + sum_spchs |
          full_name + year_week,
        data = .) 


#### Combine robustness test results into table ####

gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  # "r.squared", "R2",            2,
  "nobs",      "Observations",             0,
  "FE: full_name",  "MP fixed effect",      0,
  "FE: year_month", "Year-month fixed effect", 0,
  "FE: year_week", "Year-week fixed effect", 0)

# Table 1
modelsummary(models = list(first_protest_tweets_week, 
                           cumulative_protests_tweets_week,
                           pre_period_characteristics_tweets,
                           trends_tweets_week, 
                           placebo_tweets),
             output = "latex",
             coef_rename = c(
               "fff_event" = "Local FFF protest",
               "cumulative_fff_protests_all_time" = "Cumulative FFF protests to date",
               "pre_fff_ctweets" = "Pre-period climate tweets",
               "sum_tweets" = "Tweets (daily sum)",
               "friday_greta" = "Placebo FFF protests from Sweden",
               "fff_event:pre_fff_ctweets" = "FFF events * pre-period climate tweets",
               "fff_event:cumulative_fff_protests_all_time" = "FFF events * cumulative FFF protests",
               "frontbench" = "Frontbench"),
             gof_map = gm, 
             notes = list('Outcome is count of climate tweets',
                          'Standard errors clustered by MP'),
             title = 'Robustness tests for Fridays for Future protest on MPs’ tweets',
             stars = c("*" = 0.05, "**" = 0.01))


modelsummary(list(first_protest_speeches_week,
                  cumulative_protests_speeches_week,
                  pre_period_characteristics_speeches,
                  trends_speeches_week,
                  placebo_speeches),
             output = "latex",
             coef_rename = c(
               "fff_event" = "Local FFF protest",
               "cumulative_fff_protests_all_time" = "Cumulative FFF protests to date",
               "pre_fff_cspchs" = "Pre-period climate speeches",
               "sum_spchs" = "Speeches (daily sum)",
               "friday_greta" = "Placebo FFF protests from Sweden",
               "fff_event:pre_fff_cspchs" = "FFF events * pre-period climate speeches",
               "fff_event:cumulative_fff_protests_all_time" = "FFF events * cumulative FFF protests",
               "frontbench" = "Frontbench"),
             gof_map = gm, 
             notes = list('Outcome is count of climate speeches',
                          'Standard errors clustered by MP'),
             title = 'Robustness tests for Fridays for Future protest on MPs’ speeches',
             stars = c("*" = 0.05, "**" = 0.01))



#### IV.5. Event study framework ####

## Re-shape data
mp_event_framework <- mp_df %>% 
  select(full_name, date, 
         sum_tweets, sum_ctweets,
         sum_spchs, sum_cspchs,
         fff_event) %>% 
  mutate(date = ymd(date)) %>% 
  mutate(year = year(date),
         month = month(date),
         week = isoweek(date), # Calendar weeks, starting on Monday
         year_month = paste(year, month, sep = "_"),
         year_week = paste(year, week, sep = "_")) %>% 
  # Group by MP
  group_by(full_name) %>% 
  # Create a "lead" FFF event indicator, for investigating anticipation effects
  do(data.frame(., setNames(shift(.$fff_event, 5:1, type = "lead"),
                            paste0('fff_event_lead', 5:1)))) %>% 
  # Create a "lag" FFF event indicator, for investigating anticipation effects
  do(data.frame(., setNames(shift(.$fff_event, 5:1, type = "lag"), 
                            paste0('fff_event_lag', 5:1)))) %>% 
  # Replace these lags/leads with 0 if there is another FFF at that time
  mutate_at(vars(fff_event_lead5:fff_event_lead1),
            ~replace(., fff_event == 1, 0)) %>%
  mutate_at(vars(fff_event_lag5:fff_event_lag1),
            ~replace(., fff_event == 1, 0)) %>%
  # Create event dummies
  mutate(fff_event_minus5 = ifelse(fff_event_lead5 == 1, 1, 0),
         fff_event_minus4 = ifelse(fff_event_lead4 == 1, 1, 0),
         fff_event_minus3 = ifelse(fff_event_lead3 == 1, 1, 0),
         fff_event_minus2 = ifelse(fff_event_lead2 == 1, 1, 0),
         fff_event_minus1 = ifelse(fff_event_lead1 == 1, 1, 0),
         fff_event_plus1 = ifelse(fff_event_lag1 == 1, 1, 0),
         fff_event_plus2 = ifelse(fff_event_lag2 == 1, 1, 0),
         fff_event_plus3 = ifelse(fff_event_lag3 == 1, 1, 0),
         fff_event_plus4 = ifelse(fff_event_lag4 == 1, 1, 0),
         fff_event_plus5 = ifelse(fff_event_lag5 == 1, 1, 0)) %>% 
  # # Normalize pre-treatment lead
  # mutate(fff_event_minus5=1) %>% 
  select(full_name, date, year_month, year_week,
         sum_tweets, sum_ctweets,
         sum_spchs, sum_cspchs,
         starts_with("fff_event_minus"), fff_event, starts_with("fff_event_plus"))

## Run models
event_study_tweets_month <- feols(sum_ctweets ~ 
                                    fff_event_minus5 + fff_event_minus4 + fff_event_minus3 + fff_event_minus2 + fff_event_minus1 +
                                    fff_event + 
                                    fff_event_plus1 + fff_event_plus2 + fff_event_plus3 + fff_event_plus4 + fff_event_plus5 +
                                    sum_tweets |
                                    full_name + year_month,
                                  data = mp_event_framework)
event_study_tweets_week <- feols(sum_ctweets ~ 
                                   fff_event_minus5 + fff_event_minus4 + fff_event_minus3 + fff_event_minus2 + fff_event_minus1 +
                                   fff_event + 
                                   fff_event_plus1 + fff_event_plus2 + fff_event_plus3 + fff_event_plus4 + fff_event_plus5 +
                                   sum_tweets |
                                   full_name + year_week,
                                 data = mp_event_framework)
event_study_speeches_month <- feols(sum_cspchs ~ 
                                      fff_event_minus5 + fff_event_minus4 + fff_event_minus3 + fff_event_minus2 + fff_event_minus1 +
                                      fff_event + 
                                      fff_event_plus1 + fff_event_plus2 + fff_event_plus3 + fff_event_plus4 + fff_event_plus5 +
                                      sum_spchs |
                                      full_name + year_month,
                                    data = mp_event_framework)

event_study_speeches_week <- feols(sum_cspchs ~ 
                                     fff_event_minus5 + fff_event_minus4 + fff_event_minus3 + fff_event_minus2 + fff_event_minus1 +
                                     fff_event + 
                                     fff_event_plus1 + fff_event_plus2 + fff_event_plus3 + fff_event_plus4 + fff_event_plus5 +
                                     sum_spchs |
                                     full_name + year_week,
                                   data = mp_event_framework)

  
## Bind tweets and speeches

event_study_plot <- bind_rows(event_study_tweets_week %>%
            tidy() %>% 
            as.data.frame() %>% 
            filter(term!="sum_tweets") %>% 
            mutate(outcome = "Tweets",
                   index = row_number()),
          event_study_speeches_week %>% 
            tidy() %>% 
            as.data.frame() %>% 
            filter(term!="sum_spchs") %>% 
            mutate(outcome = "Speeches",
                   index = row_number())) %>% 
  # mutate(index = ifelse(outcome == "Tweets", index-.1, index+.1)) %>% 
  ggplot(., aes(index, estimate, shape = outcome, color = outcome)) +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_vline(xintercept = 6, linetype = 2) +
  geom_linerange(aes(ymin = estimate - 1.96 * std.error, 
                     ymax = estimate + 1.96 * std.error),
                 position = position_dodge(width = 0.5), size=0.75) +
  geom_point(aes(shape = outcome), 
             position = position_dodge(width = 0.5), size = 2) +
  scale_shape_manual(values = c(19,18)) +
  scale_color_manual(values=c("#79b321","#344d0e")) +
  scale_x_continuous(breaks = seq(1, 11, 1),
                     labels = c("-5\nSu", "-4\nM", "-3\nTu",
                                "-2\nW", "-1\nTh", 
                                "0\nFriday", "1\nSa", 
                                "2\nSu", "3\nM", 
                                "4\nTu", "5\nW")) +
  theme_tufte(base_family = "Arial") +
  theme(legend.position = "bottom",
        legend.text = element_text(size=15),
        legend.title = element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.text.x = element_text(size=15),
        axis.title.y = element_text(size=15),
        axis.text.y = element_text(size=15),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        panel.grid.major = element_blank()) +
  labs(x = "Days relative to protest",
       y = "Estimate and 95% CIs",
       shape = "Outcome",
       color = "Outcome")
ggsave(event_study_plot, filename = "plots/figa2.png", height = 6, width = 8, dpi = 600)

